This code can be found in STExplorer’s GitHub repo as load_packages.R
## 1 Bioconductor ----
pkgBio <- c("Spaniel", "scater",
"batchelor", "scran", "DESeq2")
if (!require("BiocManager", quietly = TRUE))
install.packages("BiocManager")
## Check if packages are installed and load them or install&load them if not.
pkg.check <- lapply(
pkgBio,
FUN <- function(x) {
if (!require(x, character.only = TRUE)) {
BiocManager::install(x, update = FALSE)
library(x, character.only = TRUE)
}
}
)
## 2 Cran ----
pkgCRAN <- c("Seurat", "cowplot",
"RColorBrewer",
"harmony", "dplyr",
"spdep", "sf", "jsonlite",
"tidyverse")
## Check if packages are installed and load them or install&load them if not.
pkg.check <- lapply(
pkgCRAN,
FUN <- function(x) {
if (!require(x, character.only = TRUE)) {
install.packages(x, dependencies = TRUE)
library(x, character.only = TRUE)
}
}
)
## 3 GitHub ----
pkgGit <- c("RachelQueen1/SCFunctionsV3",
"eddelbuettel/rbenchmark")
if (!require("devtools", quietly = TRUE))
install.packages("devtools")
## Check if packages are installed and load them or install&load them if not.
pkg.check <- lapply(
pkgGit,
FUN <- function(x) {
pkg.name <- sub(".*/", "", x)
if (!require(pkg.name, character.only = TRUE)) {
devtools::install_git(paste0("https://github.com/", x),
force = TRUE)
library(pkg.name, character.only = TRUE)
}
}
)
## 4 source scripts ----
source("./R/sf_coord_as_df.R")
source("./R/sfc_coord_as_df.R")
source("./R/readSpacerangerMD.R")
source("./R/readSpacerangerD.R")
This code can be found in STExplorer’s GitHub repo as make_folders.R
## Set file paths
projDir <- file.path(getwd(), "data")
inputDir <- file.path(projDir, "spaceranger_outs/")
outputDir <- file.path(projDir, "rObjects/")
csvDir <- file.path(projDir, "csvFiles/")
graphDir <- file.path(projDir, "graphics_out/")
## Check if inputDir/ outputDir/ csvDir exist and create them if not.
dirs <- c(inputDir, outputDir, csvDir, graphDir)
dirCheck <- lapply(
dirs,
FUN = function(x){
if(!dir.exists(x)) {
dir.create(x, recursive = TRUE)
print(paste0("Folder: ", x, " CREATED!"))
} else {
print(paste0("Folder: ", x, " EXISTS!"))
}
}
)
## [1] "Folder: /Users/b9047753/Library/CloudStorage/OneDrive-NewcastleUniversity/Projects/STExplorer/data/spaceranger_outs/ EXISTS!"
## [1] "Folder: /Users/b9047753/Library/CloudStorage/OneDrive-NewcastleUniversity/Projects/STExplorer/data/rObjects/ EXISTS!"
## [1] "Folder: /Users/b9047753/Library/CloudStorage/OneDrive-NewcastleUniversity/Projects/STExplorer/data/csvFiles/ EXISTS!"
## [1] "Folder: /Users/b9047753/Library/CloudStorage/OneDrive-NewcastleUniversity/Projects/STExplorer/data/graphics_out/ EXISTS!"
This code can be found in STExplorer’s GitHub repo as find_nb_pixelXY.R
## set the file paths to spaceranger's spatial and gene expression folders
sampleDir <- "Olfactory_Bulb/Olfactory_Bulb_A1_Results"
spatialDir <- file.path(inputDir, sampleDir, "spatial")
countsDir <- file.path(inputDir, sampleDir, "filtered_feature_bc_matrix")
## Import the dataset
inputMD <- readSpacerangerMD(spatialDir, res = "low") #read-in MetaData
inputD <- readSpacerangerD(countsDir) #read-in gene expression Data
As capture area we refer to the total number of spots that are on the slide
## Select spots in both bins (Sections) 0 and 1
spot_position <- inputMD %>%
select(c("Barcode", "pixel_x", "pixel_y", "Section"))
head(spot_position, 5)
## Barcode pixel_x pixel_y Section
## 1 ACGCCTGACACGCGCT-1 66.12 520.08 0
## 2 TACCGATCCAACACTT-1 72.00 516.66 0
## 3 ATTAAAGCGGACGAGC-1 66.12 513.24 0
## 4 GATAAGGGACGATTAG-1 72.00 509.88 0
## 5 GTGCAAATCACCAATA-1 66.12 506.46 0
## Convert spots to centroids
centroids <- spot_position %>%
st_as_sf(coords = c("pixel_x", "pixel_y"),
remove = FALSE)
head(centroids, 5)
## Simple feature collection with 5 features and 4 fields
## Geometry type: POINT
## Dimension: XY
## Bounding box: xmin: 66.12 ymin: 506.46 xmax: 72 ymax: 520.08
## CRS: NA
## Barcode pixel_x pixel_y Section geometry
## 1 ACGCCTGACACGCGCT-1 66.12 520.08 0 POINT (66.12 520.08)
## 2 TACCGATCCAACACTT-1 72.00 516.66 0 POINT (72 516.66)
## 3 ATTAAAGCGGACGAGC-1 66.12 513.24 0 POINT (66.12 513.24)
## 4 GATAAGGGACGATTAG-1 72.00 509.88 0 POINT (72 509.88)
## 5 GTGCAAATCACCAATA-1 66.12 506.46 0 POINT (66.12 506.46)
## Combine the points into a multipoint geometry:
cntd_union <- st_union(centroids)
head(cntd_union)
## Geometry set for 1 feature
## Geometry type: MULTIPOINT
## Dimension: XY
## Bounding box: xmin: 65.64 ymin: 88.08 xmax: 521.34 ymax: 520.08
## CRS: NA
## MULTIPOINT ((65.64 91.98), (65.64 98.76), (65.7...
## Use the union of points to generate a voronoi object
voronoi <- st_voronoi(cntd_union, bOnlyEdges = TRUE)
head(voronoi)
## Geometry set for 1 feature
## Geometry type: MULTILINESTRING
## Dimension: XY
## Bounding box: xmin: -390.06 ymin: -367.62 xmax: 977.04 ymax: 975.78
## CRS: NA
## MULTILINESTRING ((69.57727 91.95, 67.64273 95.3...
## Create an enveloped voronoi tessellation around the tissue
voronoi_env <- st_intersection(st_cast(voronoi), st_convex_hull(cntd_union))
head(voronoi_env)
## Geometry set for 1 feature
## Geometry type: MULTILINESTRING
## Dimension: XY
## Bounding box: xmin: 65.64 ymin: 88.08 xmax: 521.34 ymax: 520.08
## CRS: NA
## MULTILINESTRING ((69.57727 91.95, 67.64273 95.3...
Plot and save the Voronoi tessellation
As bin_1 spots we refer to the on-tissue spots.
## Generate the POLYGONS from the MULTILINESTRING for bin_1 only and attach the
## barcode names
polygons <- st_polygonize(voronoi_env) %>% # polygonise the tessellation
st_cast() %>% # convert GEOMETRYCOLLECTION to multiple POLYGONS
st_sf() %>% # convert sfc object to sf for st_join afterwards
st_join(.,
centroids[centroids$Section == 1,],
join = st_contains,
left = FALSE) %>% # Join the centroids with the POLYGONS
mutate(Barcode_rn = Barcode) %>% # duplicate the barcode column
column_to_rownames("Barcode_rn") %>% # move duplicate column to row names
st_sf() # convert back to sf (mutate makes it a df)
## Create contiguity neighbours
neighbours <- poly2nb(polygons, snap = 0)
names(neighbours) = attr(neighbours, "region.id") # add names to the sub-lists
## Add number of neighbours for each polygon back to the polygons object
polygons$nb_count <- card(neighbours)
## Add the neighbour (nb) IDs as a nested df in the polygons object
nb_IDs <- neighbours %>%
nb2lines(., coords = polygons$geometry) %>% #get nb connecting lines
as("sf") %>% #convert to sf
st_drop_geometry() %>% #drop geometry column
select(i_ID, j_ID) %>% #select only nb ID columns
rename(nb_IDs = j_ID) %>% #rename the neighbours ID column
group_by(i_ID) %>% #group by spot
nest() #nest the groupings
polygons <- right_join(polygons, nb_IDs, by = c("Barcode" = "i_ID")) %>%
rename(nb_IDs = data, geom_pol = geometry)
## Update the polygon object to keep the centroid geometries as well
polygons <- left_join(as.data.frame(polygons), as.data.frame(centroids),
by = c("Barcode" = "Barcode"), suffix = c("", ".y")) %>%
select(!ends_with(".y")) %>%
rename(geom_cntd = geometry) %>%
st_sf(sf_column_name = "geom_pol")
Plot the polygons
## Calculate neighbour weights with a distance decay function
neighbours_wght <- nb2listwdist(neighbours, polygons$geom_cntd,
type = "idw", style = "raw", alpha = 1)
head(neighbours_wght$weights, 5)
## [[1]]
## [1] 0.1461988 0.1474926 0.1470099 0.1476604
##
## [[2]]
## [1] 0.1461988 0.1474926 0.1470099 0.1476604
##
## [[3]]
## [1] 0.1461988 0.1458960 0.1465318
##
## [[4]]
## [1] 0.1474926 0.1474926 0.1470099 0.1476604
##
## [[5]]
## [1] 0.1461988 0.1474926 0.1458960 0.1465318
This code can be found in STExplorer’s GitHub repo as find_nb_pixelXY.R
## Import gene counts
inputD <- readSpacerangerD(countsDir)
## Prepare for gene expression normalisation using DESeq2
spotName <- colnames(inputD)
spotTable <- data.frame(spotName = spotName)
dds <- DESeqDataSetFromMatrix(countData = inputD,
colData = spotTable,
design = ~spotName)
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## Note: levels of factors in the design contain characters other than
## letters, numbers, '_' and '.'. It is recommended (but not required) to use
## only letters, numbers, and delimiters '_' or '.', as these are safe characters
## for column names in R. [This is a message, not a warning or an error]
dds = estimateSizeFactors(dds) # Estimate size factors
## Note: levels of factors in the design contain characters other than
## letters, numbers, '_' and '.'. It is recommended (but not required) to use
## only letters, numbers, and delimiters '_' or '.', as these are safe characters
## for column names in R. [This is a message, not a warning or an error]
head(sizeFactors(dds), 10) # Print the first 10 size factors
## AAACAAGTATCTCCCA-1 AAACCGGGTAGGTACC-1 AAACCGTTCGTCCAGG-1 AAACGAGACGGTTGAT-1
## 0.1355829 3.3937026 1.8759736 1.3502440
## AAACTGCTGGCTCCAA-1 AAACTTGCAAACGTAT-1 AAAGGCTCTCGCGCCG-1 AAAGTAGCATTGCTCA-1
## 1.7467547 0.2813147 1.4772664 0.6734527
## AAAGTGTGATTTATCT-1 AAAGTTGACTCCCGTA-1
## 1.7160711 1.1288676
counts = counts(dds, normalized = TRUE) # export normalised counts
head(counts, c(10,5)) # Print the first 10 gene counts
## AAACAAGTATCTCCCA-1 AAACCGGGTAGGTACC-1 AAACCGTTCGTCCAGG-1
## ENSMUSG00000051951 0 0.0000000 0.0000000
## ENSMUSG00000089699 0 0.0000000 0.0000000
## ENSMUSG00000102331 0 0.0000000 0.0000000
## ENSMUSG00000102343 0 0.0000000 0.0000000
## ENSMUSG00000025900 0 0.0000000 0.0000000
## ENSMUSG00000025902 0 0.0000000 0.0000000
## ENSMUSG00000104238 0 0.0000000 0.0000000
## ENSMUSG00000104328 0 0.0000000 0.0000000
## ENSMUSG00000033845 0 0.8839902 0.5330565
## ENSMUSG00000025903 0 0.0000000 1.5991696
## AAACGAGACGGTTGAT-1 AAACTGCTGGCTCCAA-1
## ENSMUSG00000051951 0.0000000 0.00000
## ENSMUSG00000089699 0.0000000 0.00000
## ENSMUSG00000102331 0.0000000 0.00000
## ENSMUSG00000102343 0.0000000 0.00000
## ENSMUSG00000025900 0.0000000 0.00000
## ENSMUSG00000025902 0.0000000 0.00000
## ENSMUSG00000104238 0.0000000 0.00000
## ENSMUSG00000104328 0.0000000 0.00000
## ENSMUSG00000033845 0.7406069 1.14498
## ENSMUSG00000025903 0.7406069 0.00000